{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入必要的工具包\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics\n",
    "\n",
    "from sklearn.decomposition import PCA\n",
    "import time\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>c_3</th>\n",
       "      <th>c_4</th>\n",
       "      <th>c_5</th>\n",
       "      <th>c_6</th>\n",
       "      <th>c_7</th>\n",
       "      <th>c_8</th>\n",
       "      <th>c_9</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 102 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id  c_1  c_2  c_3  c_4  c_5  c_6  c_7  c_8  c_9   ...     c_92  \\\n",
       "0   684921758    2    0    2    0    0    0    0    0    0   ...        0   \n",
       "1   244999119    2    0    2    0    0    0    0    0    0   ...        0   \n",
       "2  3928440935    0    0    0    0    0    0    0    0    0   ...        0   \n",
       "3  2582345152    1    0    2    1    0    0    0    0    0   ...        0   \n",
       "4  1051165850    1    1    0    0    0    0    0    2    0   ...        0   \n",
       "\n",
       "   c_93  c_94  c_95  c_96  c_97  c_98  c_99  c_100  c_other  \n",
       "0     1     0     0     0     0     0     0      0        9  \n",
       "1     0     0     0     0     0     0     0      0        7  \n",
       "2     0     0     0     0     0     0     0      0       12  \n",
       "3     0     0     0     0     0     0     0      0        8  \n",
       "4     0     0     0     0     0     0     0      0        9  \n",
       "\n",
       "[5 rows x 102 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "events=pd.read_csv('X_events.csv')\n",
    "events.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_train = events['event_id']\n",
    "train = events.drop(\"event_id\",axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the shape of train_image: (13418, 101)\n"
     ]
    }
   ],
   "source": [
    "# 原始输入的特征维数和样本数目\n",
    "print('the shape of train_image: {}'.format(train.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "# 将训练集合拆分成训练集和校验集，在校验集上找到最佳的模型超参数（PCA的维数）\n",
    "X_train_part, X_val, y_train_part, y_val = train_test_split(train ,y_train, train_size = 0.8,random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 一个参数点（聚类数据为K）的模型，在校验集上评价聚类算法性能\n",
    "def K_cluster_analysis(K, X_train, y_train, X_val, y_val):\n",
    "    start = time.time()\n",
    "    \n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    \n",
    "    #K-means,在训练集上训练\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X_train)\n",
    "    \n",
    "    # 在训练集和测试集上测试\n",
    "    #y_train_pred = mb_kmeans.fit_predict(X_train)\n",
    "    y_val_pred = mb_kmeans.predict(X_val)\n",
    "    \n",
    "    #以前两维特征打印训练数据的分类结果\n",
    "    #plt.scatter(X_train[:, 0], X_train[:, 1], c=y_pred)\n",
    "    #plt.show()\n",
    "\n",
    "    # K值的评估标准\n",
    "    #常见的方法有轮廓系数Silhouette Coefficient和Calinski-Harabasz Index\n",
    "    #这两个分数值越大则聚类效果越好\n",
    "    #CH_score = metrics.calinski_harabaz_score(X_train,mb_kmeans.predict(X_train))\n",
    "    CH_score = metrics.silhouette_score(X_train,mb_kmeans.predict(X_train))\n",
    "    \n",
    "    #也可以在校验集上评估K\n",
    "    #v_score = metrics.v_measure_score(y_val, y_val_pred)\n",
    "    \n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps:{}\".format(CH_score, int(end-start)))\n",
    "    #print(\"v_score: {}\".format(v_score))\n",
    "    \n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.41381562600902716, time elaps:4\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 0.23048918188033218, time elaps:4\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 0.23244518198693775, time elaps:4\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.20498461032011683, time elaps:4\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 0.13992812835771584, time elaps:4\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 0.10661577403504381, time elaps:4\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 0.09672151036156788, time elaps:4\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.09817040094258345, time elaps:4\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 0.09566331390321296, time elaps:4\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 0.05611414917762623, time elaps:4\n"
     ]
    }
   ],
   "source": [
    "# 设置超参数（聚类数目K）搜索范围\n",
    "Ks = range(10,101,10)\n",
    "CH_scores = []\n",
    "#v_scores = []\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, X_train_part, y_train_part, X_val, y_val)\n",
    "    CH_scores.append(ch)\n",
    "#    v_scores.append(v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1df000d4e80>]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHp1JREFUeJzt3XmYVNWd//H3l2aV3dA6yi5pDbjBWOIWfdSg4MLiGBBNlGSSIU7A5adOxOjECYlxy2iSR9QwhhiTKEFc0qMZkVHUGKNSuCVACC0itLh0BpcZFwjw/f1xboXqpqBv09V9q+p+Xs9TT/fdqr9dFJ+6fe6555i7IyIi6dAh6QJERKT9KPRFRFJEoS8ikiIKfRGRFFHoi4ikiEJfRCRFFPoiIimi0BcRSRGFvohIinRMuoCm+vXr50OGDEm6DBGRsrJs2bK/uHt1c/uVXOgPGTKEbDabdBkiImXFzF6Ps5+ad0REUkShLyKSIgp9EZEUUeiLiKSIQl9EJEUU+iIiKaLQFxFJkYoJ/Y0b4dvfhldeSboSEZHSVTGhbwbXXAM//3nSlYiIlK6KCf2+feGkk+Dee0FzvYuIFFYxoQ8wZQq8/josXZp0JSIipamiQn/iROjUKZzti4jIjmKFvpmNM7NVZlZnZrN2sd/nzczNLJO37orouFVmNrYYRe9Mnz5q4hER2ZVmQ9/MqoA5wCnACOBsMxtRYL+ewIXAc3nrRgBTgQOBccCt0fO1GTXxiIjsXJwz/dFAnbuvcffNwHxgYoH9vgPcAHySt24iMN/dN7n7a0Bd9HxtJtfEs2BBW/4UEZHyFCf0+wPr85bro3V/Y2ajgIHu/lBLjy22Pn3g5JPVxCMiUkic0LcC6/4Wp2bWAbgZuLSlx+Y9x3Qzy5pZtqGhIUZJuzZ5MqxbB88/3+qnEhGpKHFCvx4YmLc8ANiQt9wTOAh4wszWAkcCtdHF3OaOBcDd57p7xt0z1dXNzvbVLPXiEREpLE7oLwVqzGyomXUmXJitzW109/fdvZ+7D3H3IcCzwAR3z0b7TTWzLmY2FKgB2vz8W008IiKFNRv67r4FmAksAlYCC9x9uZnNNrMJzRy7HFgArAAeAWa4+9bWl928KVPUxCMi0pR5iZ0KZzIZL8bE6O+9B3vtBRdeCN//fhEKExEpYWa2zN0zze1XUXfk5uvTB8aOVROPiEi+ig19UC8eEZGmKjr0J0zQjVoiIvkqOvTVxCMi0lhFhz6EJp716+G555rfV0Sk0lV86E+YAJ0760YtERFIQejn36i1bVvS1YiIJKviQx/CjVrr16sXj4hIKkJfTTwiIkEqQr937+29eNTEIyJplorQh+29eNTEIyJplprQzzXx6EYtEUmz1IR+roln4UI18YhIeqUm9EE3aomIpCr01YtHRNIuVaGvXjwiknapCn0IN2rV16uJR0TSKXWhP368evGISHqlLvR794Zx49SLR0TSKVbom9k4M1tlZnVmNqvA9vPN7A9m9pKZPW1mI6L1Q8zs42j9S2Z2e7F/gd0xebKaeEQknZoNfTOrAuYApwAjgLNzoZ7nbnc/2N1HAjcAN+Vte9XdR0aP84tVeGtMmABduqiJR0TSJ86Z/migzt3XuPtmYD4wMX8Hd/8gb7E7UNLzVPXqpRu1RCSd4oR+f2B93nJ9tK4RM5thZq8SzvQvzNs01MxeNLMnzezYVlVbRLlePM8+m3QlIiLtJ07oW4F1O5zJu/scdx8GXA5cFa1+Exjk7qOAS4C7zazXDj/AbLqZZc0s29DQEL/6Vhg/PjTx6EYtEUmTOKFfDwzMWx4AbNjF/vOBSQDuvsnd/yf6fhnwKrB/0wPcfa67Z9w9U11dHbf2Vsk18ehGLRFJkzihvxSoMbOhZtYZmArU5u9gZjV5i6cBq6P11dGFYMxsP6AGWFOMwothyhR44w018YhIenRsbgd332JmM4FFQBUwz92Xm9lsIOvutcBMMxsD/BV4F5gWHX4cMNvMtgBbgfPdfWNb/CK7I9fEs2ABHH100tWIiLQ9cy+tjjaZTMaz2Wy7/bxJkyCbhXXroEPqblUTkUphZsvcPdPcfqmPucmTQxPP73+fdCUiIm0v9aGvXjwikiapD/1evTQWj4ikR+pDH7b34lETj4hUOoU+auIRkfRQ6AM9e8Ipp+hGLRGpfAr9yOTJsGGDmnhEpLIp9CP5N2qJiFQqhX4k18SjXjwiUskU+nlyTTzPPJN0JSIibUOhn0e9eESk0in086iJR0QqnUK/iSlT1MQjIpVLod/E6aeriUdEKpdCv4mePeHUU9XEIyKVSaFfgHrxiEilUugXcPrp0LWrbtQSkcqj0C9AvXhEpFLFCn0zG2dmq8yszsxmFdh+vpn9wcxeMrOnzWxE3rYrouNWmdnYYhbfliZPhjffhN/9LulKRESKp9nQN7MqYA5wCjACODs/1CN3u/vB7j4SuAG4KTp2BDAVOBAYB9waPV/JyzXxqBePiFSSOGf6o4E6d1/j7puB+cDE/B3c/YO8xe5Abrb1icB8d9/k7q8BddHzlTw18YhIJYoT+v2B9XnL9dG6Rsxshpm9SjjTv7Alx5aqKVPUxCMilSVO6FuBdb7DCvc57j4MuBy4qiXHmtl0M8uaWbahoSFGSe1DvXhEpNLECf16YGDe8gBgwy72nw9Masmx7j7X3TPunqmuro5RUvvo0SPcqHXffWriEZHKECf0lwI1ZjbUzDoTLszW5u9gZjV5i6cBq6Pva4GpZtbFzIYCNcDzrS+7/agXj4hUko7N7eDuW8xsJrAIqALmuftyM5sNZN29FphpZmOAvwLvAtOiY5eb2QJgBbAFmOHuW9vod2kT+U08xx6bdDUiIq1j7js0sScqk8l4NptNuoxGzjwzzJ27fj1UlUWHUxFJGzNb5u6Z5vbTHbkxqBePiFQKhX4Mp52mG7VEpDIo9GPI9eJZuBC2ltUVCRGRxhT6MU2ZAm+9pSYeESlvCv2Yck08ulFLRMqZQj+mHj1C8N93n5p4RKR8KfRbYPLk0MTz9NNJVyIisnsU+i1w2mnQrZt68YhI+VLot0D+WDxq4hGRcqTQb6FcLx418YhIOVLot5CaeESknCn0W6h79xD8ulFLRMqRQn83TJ4Mb7+tJh4RKT8K/d2Qa+LRjVoiUm4U+rsh18SjXjwiUm4U+rsp18Tz298mXYmISHwK/d2kXjwiUo4U+rtJTTwiUo4U+q0wZYqaeESkvMQKfTMbZ2arzKzOzGYV2H6Jma0ws1fM7DEzG5y3bauZvRQ9aotZfNJOPVVNPCJSXpoNfTOrAuYApwAjgLPNbEST3V4EMu5+CLAQuCFv28fuPjJ6TChS3SWhe3c4/XQ18YhI+Yhzpj8aqHP3Ne6+GZgPTMzfwd2XuPtH0eKzwIDillm61ItHRMpJnNDvD6zPW66P1u3MV4D/ylvuamZZM3vWzCbtRo0l7dRTYY89dKOWiJSHOKFvBdZ5wR3NvghkgBvzVg9y9wxwDvADMxtW4Ljp0QdDtqGhIUZJpUO9eESknMQJ/XpgYN7yAGBD053MbAxwJTDB3Tfl1rv7hujrGuAJYFTTY919rrtn3D1TXV3dol+gFEyeDO+8A089lXQlIiK7Fif0lwI1ZjbUzDoDU4FGvXDMbBTwY0Lgv5O3vq+ZdYm+7wccA6woVvGlItfEo148IlLqmg19d98CzAQWASuBBe6+3Mxmm1muN86NQA/g3iZdM4cDWTN7GVgCXOfuFRf6auIRkXJh7gWb5xOTyWQ8m80mXUaLLVwYmnkefxxOOCHpakQkbcxsWXT9dJd0R26RqBePiJQDhX6R7LFHuFHr/vvVxCMipUuhX0TqxSMipa5j0gVUkvwmnrZu13eHTZvgk0+2Pz7+uPFyjx6QyYAVutNCRFJJoV9EuSae++6DL3yhcQDvLJh3tX5X+27a1Hw9AIceCpdcAlOnQufObfv7i0jpU++dInvgAfiHf4i3b1VVGKWza9fGj2Ktq6uDm2+G5cth333hggvga1+Dvn3b9jUQkfYXt/eOQr/I3EO3zW3bmg/pju3wd5Y7PPoo/Pu/w+LF4a+Rf/xHuPhiGLbDgBgiUq4U+rKDV16Bm26Cu+8OPYwmTYJLL4Wjj066MhFpLfXTlx0ccgjceSesXQuXXw5LlsAxx8BRR4Wby9TVVKTyKfRTaN994Xvfg/Xr4ZZboKEhdDetqYEf/Qj+93+TrlBE2opCP8W6d4cZM2DVqnBT2b77wkUXwcCB4S+BN95IukIRKTaFvlBVBWecAU8/Dc8+CyefDN//PgwZAueeCy+9lHSFIlIsCn1p5Igjws1ldXXhr4AHH4RRo+Bzn4Pf/Cb0ShKR8qXQl4KGDoUf/CC0+99wQ2gCOu00OOgguOOOcJOYiJQfhb7sUp8+8C//Aq+9Br/4Rbi/4J/+CQYPhtmzw0VgESkfCn2JpVOnMLTEsmXh5rPDD4err4ZBg8JdvqtWJV2hiMSh0JcWMQuDyT30EKxYES70/uxn8JnPwPjx8MQT4S5gESlNCn3ZbcOHw9y5sG4d/Nu/wXPPhQ+ETCbc9fvXvyZdoYg0pdCXVttrr9DU8/rr4UPgo49CU9B++8GNN8L77yddoYjkxAp9MxtnZqvMrM7MZhXYfomZrTCzV8zsMTMbnLdtmpmtjh7Tilm8lJZu3cJF3uXLQ/NPTQ184xuhx8/atUlXJyIQI/TNrAqYA5wCjADONrMRTXZ7Eci4+yHAQuCG6Ng9gauBI4DRwNVmpoF9K1yHDqF75+OPwzPPwIcfwpgx8OabSVcmInHO9EcDde6+xt03A/OBifk7uPsSd/8oWnwWGBB9PxZY7O4b3f1dYDEwrjilSzk46qhwU9dbb4U7fTduTLoikXSLE/r9gfV5y/XRup35CvBfLTnWzKabWdbMsg3q+F1xjjwSamth9Wo45RQN6CaSpDihX2iG1YKd8szsi0AGuLElx7r7XHfPuHumuro6RklSbk48MQzvsGwZTJgQpnwUkfYXJ/TrgYF5ywOADU13MrMxwJXABHff1JJjJR0mTIC77oInn4QpU9SlUyQJcUJ/KVBjZkPNrDMwFajN38HMRgE/JgT+O3mbFgEnm1nf6ALuydE6SalzzoFbbw29e847TxO3iLS3ZmdpdfctZjaTENZVwDx3X25ms4Gsu9cSmnN6APeaGcA6d5/g7hvN7DuEDw6A2e6uS3kpd/75oe/+rFnQqxfcfnu401dE2l6sqbnd/TfAb5qs+1be92N2cew8YN7uFiiV6fLLQ/Bfey307g3XX6/gF2kPsUJfpC1ccw188EG4a7d3b7jyyqQrEql8Cn1JjFmYk/eDD+Cqq0JTzwUXJF2VSGVT6EuiOnSAefNC3/0LLwxn/Oedl3RVIpVLA65J4jp2hHvuCVMyfvnL8MADSVckUrkU+lISunYN8/EecQRMnQqLFyddkUhlUuhLyejRAx5+OIzTP2kS/O53SVckUnkU+lJS+vaFRYtgwIAwUueLLyZdkUhlUehLydl779C806sXjB0Lf/pT0hWJVA6FvpSkQYPgv/87dOs86aQwK5eItJ5CX0rW/vvDo4/C//1fmITlrbeSrkik/Cn0paQdemiYhOXNNzUJi0gxKPSl5B11VOjOuWoVnHqqJmERaQ2FvpSFMWPgV7+CbDZ05/zkk6QrEilPCn0pG5MmwU9/GiZc1yQsIrtHoS9l5dxzYc4c+M//hC99CbZtS7oikfKiAdek7Hz962Es/m9+M/Tlv/VWjcUvEpdCX8rSFVeE4L/++jAy53XXJV2RSHlQ6EvZuvbaxsF/xRVJVyRS+mK16ZvZODNbZWZ1ZjarwPbjzOwFM9tiZp9vsm2rmb0UPWqbHiuyu8xC+/4554Smnjlzkq5IpPQ1e6ZvZlXAHOAkoB5Yama17r4ib7d1wJeAywo8xcfuPrIItYrsoEMHuPPOcNfuzJmhjf/cc5OuSqR0xTnTHw3Uufsad98MzAcm5u/g7mvd/RVAfSmk3XXqFPrwn3himITlwQeTrkikdMUJ/f7A+rzl+mhdXF3NLGtmz5rZpBZVJxJTbhKWTAbOOisM1iYiO4oT+oU6w3kLfsYgd88A5wA/MLNhO/wAs+nRB0O2oaGhBU8tsl3PnmGcngMOgIkT4fe/T7oikdITJ/TrgYF5ywOADXF/gLtviL6uAZ4ARhXYZ667Z9w9U11dHfepRXaw555hZM599w3j9Lz8ctIViZSWOKG/FKgxs6Fm1hmYCsTqhWNmfc2sS/R9P+AYYMWujxJpnb/7u9C806NHGJnzz39OuiKR0tFs6Lv7FmAmsAhYCSxw9+VmNtvMJgCY2eFmVg9MBn5sZsujw4cDWTN7GVgCXNek149Imxg8OAS/exisbd26pCsSKQ3m3pLm+baXyWQ8m80mXYZUiBdfhBNOgL32gt/+NkzFKFKJzGxZdP10lzTgmlS0UaPg4YfhjTfChCy33abROSXdFPpS8Y45Bp56Kky/+PWvw0EHwf33h6YfkbRR6EsqHHYYPPkk1NZCVRWceWb4MHj66aQrE2lfCn1JDTMYPx5eeQX+4z9g7Vo49tgwOcvKlUlXJ9I+FPqSOh07wle/CqtXw3e/G2biOugg+NrXwgTsIpVMoS+p1b07XHklvPoqzJgRpmL89KfhW9/S5OtSuRT6knrV1fCjH4UmnvHj4TvfgWHD4JZbYPPmpKsTKS6Fvkhk2DCYPx+efx4OPBAuuABGjIB771VPH6kcCn2RJg4/PLTzP/wwdOsGU6bAkUeG3j8i5U6hL1KAWRiw7aWXYN68cHPX8ceH5p/ly5s9XKRkKfRFdqGqKkzMsnp1mJP3qafgkENC75833ki6OpGWU+iLxNCtG8yaBWvWwEUXwV13QU1NmJv3/feTrk4kPoW+SAt86lNw002wahWccUY4+x82DH74Q/X0kfKg0BfZDUOHwi9/CdksjBwJF18Mw4eH3j/bNFO0lDCFvkgrHHYYLF4MjzwSJm05+2w44ghYsiTpykQKU+iLtJIZjB0LL7wAP/sZvP02nHhi6P3zhz8kXZ1IYwp9kSKpqoLzzgvTM954Y5iY/dBDQ++f9euTrk4kUOiLFFnXrnDZZWFMn0svhbvvDmP5z5oF772XdHWSdpouUaSNvf46/Ou/wi9+AX37wmmnhYu+w4eHYR722y+M/CnSGkWdLtHMxpnZKjOrM7NZBbYfZ2YvmNkWM/t8k23TzGx19JgW/1cQqQyDB4d+/S+8EO7qXbIk9O8/4ww44IAw2ufBB4fhHq6+Gn71qzDm/yefJF25VKJmz/TNrAr4M3ASUA8sBc529xV5+wwBegGXAbXuvjBavyeQBTKAA8uAw9z93Z39PJ3pSxp88AH86U9hZM8VK7Z/fe217V0+O3QIfwXk/iLI/XUwfDj07Jls/VJ64p7px/mjcjRQ5+5roieeD0wE/hb67r422ta0h/JYYLG7b4y2LwbGAffE+LkiFatXLxg9OjzyffJJuBCc+yDIfRg88kjjCd0HDGj8YZD72q9f+/4eUn7ihH5/IL/vQT1wRMznL3Rs/5jHiqRO165hbJ9DDmm8fsuWcGE4/4Ng5Uq44w748MPt+/Xr1/ivgtz3/fuHrqWlYNu2cPfy5s2wdWsY4qJLl9Kpr9LFCf1C/xRxr/7GOtbMpgPTAQYNGhTzqUXSo2PH0P5/wAFhTt+cbdtCd9D8D4KVK2HBAng3rxG1Z8/GzUP77x+eMxe+rXls2tSy/bdu3fH369AB9tgjPLp3L/7Xbt1Cl1qJF/r1wMC85QHAhpjPXw8c3+TYJ5ru5O5zgbkQ2vRjPrdI6nXoEC4UDx4M48ZtX+8O77yz4zWDRx8NN5DFZRbOwjt3bv7RvXvonZRbjnNchw6hSevDD+Gjjwp/feutHdd//HHLX6uuXXf+wVJTEy6iV1e3/HnLTZzQXwrUmNlQ4A1gKnBOzOdfBHzPzPpGyycDV7S4ShFpETPYe+/wOP74xtveew/q6sL3uwrkLl1K9+x427YQ/Dv7sNjZB0ihr++/D3PnhnGTfvhDOOecym5qajb03X2Lmc0kBHgVMM/dl5vZbCDr7rVmdjjwANAXGG9m33b3A919o5l9h/DBATA7d1FXRJLRpw9kmu3jUdo6dAhn6d27F+f5li8PcyR88YthIL3bb4dKbWnWzVkiIoRrDXPmwBVXhA+V666Df/7n8H05KOrNWSIila6qCi68MJz1H300zJwJxx0X7qeoJAp9EZE8Q4aE+yLuvDNc/D70ULjmmsb3SZQzhb6ISBNmMG1a6PU0cSJcdVW4DlIJLc8KfRGRndh773DPwwMPQENDmCDnG98IvX7KlUJfRKQZkyaFpp6vfCXMlXDIIeU7O5pCX0Qkhj59Qn/+xx8PyyeeCNOnl98cCQp9EZEWOOGEMPT1ZZfBT34CBx4Iv/510lXFp9AXEWmhPfYIzTzPPRcGuZs0Cc46K8yPXOoU+iIiuynXo+e734UHHwyD2d11Vxj7qFQp9EVEWqFTJ7jySnj55TCU9bRpYfC7tWuTrqwwhb6ISBF85jPw1FNwyy3wzDNw0EFhALdCQ0knSaEvIlIkHTrAjBlhKIfjjoOLL4bPfjZ09ywVCn0RkSIbNAgefhh+/nNYvRpGjoTZs8MkMklT6IuItAGzMFTzihVw5plhkpbDDoPnn0+2LoW+iEgb2msvuOceqK0NU1gedRRccknjuY3bk0JfRKQdjB8f2vqnT4ebb4aDD4bHHmv/OhT6IiLtpHdvuO02ePLJMDH9mDFhPJ/8SezbmkJfRKSdHXdc6Nc/a1aYqH7ECLj//vb52Qp9EZEEdOsG114LS5fCPvuEi71TpoRJ39tSrNA3s3FmtsrM6sxsVoHtXczsV9H258xsSLR+iJl9bGYvRY/bi1u+iEh5GzUqjOFz3XWw//5tPydvx+Z2MLMqYA5wElAPLDWzWnfPv93gK8C77v5pM5sKXA+cFW171d1HFrluEZGK0akTXH55+/ysOJ8po4E6d1/j7puB+cDEJvtMBH4Wfb8Q+JyZWfHKFBGRYogT+v2B9XnL9dG6gvu4+xbgfeBT0bahZvaimT1pZscW+gFmNt3MsmaWbWhoaNEvICIi8cUJ/UJn7E0HDt3ZPm8Cg9x9FHAJcLeZ9dphR/e57p5x90x1dXWMkkREZHfECf16YGDe8gBgw872MbOOQG9go7tvcvf/AXD3ZcCrwP6tLVpERHZPnNBfCtSY2VAz6wxMBWqb7FMLTIu+/zzwuLu7mVVHF4Ixs/2AGmBNcUoXEZGWarb3jrtvMbOZwCKgCpjn7svNbDaQdfda4CfAz82sDthI+GAAOA6YbWZbgK3A+e6+sS1+ERERaZ55ic3rlclkPJvNJl2GiEhZMbNl7p5pbj/dkSsikiIld6ZvZg3A60nX0Ur9gL8kXUQJ0evRmF6P7fRaNNaa12Owuzfb/bHkQr8SmFk2zp9ZaaHXozG9HtvptWisPV4PNe+IiKSIQl9EJEUU+m1jbtIFlBi9Ho3p9dhOr0Vjbf56qE1fRCRFdKYvIpIiCv1WMrOBZrbEzFaa2XIzuyhav6eZLTaz1dHXvknX2l7MrCoaWfWhaHloNLnO6miync5J19hezKyPmS00sz9F75GjUv7e+H/R/5M/mtk9ZtY1Te8PM5tnZu+Y2R/z1hV8P1jwo2hyqlfM7O+LUYNCv/W2AJe6+3DgSGCGmY0AZgGPuXsN8Fi0nBYXASvzlq8Hbo5ei3cJk+6kxQ+BR9z9M8ChhNclle8NM+sPXAhk3P0gwrAuuUmX0vL+uBMY12Tdzt4PpxDGK6sBpgO3FaUCd9ejiA/g14RZxlYB+0Tr9gFWJV1bO/3+A6I37onAQ4Rht/8CdIy2HwUsSrrOdnotegGvEV07y1uf1vdGbt6NPQnjfj0EjE3b+wMYAvyxufcD8GPg7EL7teahM/0iiuYGHgU8B+zt7m8CRF/3Sq6ydvUD4BtAbnrnTwHveZhcBwpPwlOp9gMagJ9GzV13mFl3UvrecPc3gO8D6whzbbwPLCO974+cnb0f4kxg1WIK/SIxsx7AfcDF7v5B0vUkwcxOB97xMHfC31YX2DUtXcY6An8P3OZhIqEPSUlTTiFRW/VEYCiwL9Cd0ITRVFreH81pk/87Cv0iMLNOhMD/pbvfH61+28z2ibbvA7yTVH3t6BhggpmtJcylfCLhzL9PNLkOFJ6Ep1LVA/Xu/ly0vJDwIZDG9wbAGOA1d29w978C9wNHk973R87O3g9xJrBqMYV+K0UTwP8EWOnuN+Vtyp9YZhqhrb+iufsV7j7A3YcQLtA97u5fAJYQJteBlLwWAO7+FrDezA6IVn0OWEEK3xuRdcCRZrZH9P8m93qk8v2RZ2fvh1rgvKgXz5HA+7lmoNbQzVmtZGafBX4L/IHt7djfJLTrLwAGEd7skz1FE8iY2fHAZe5+ejRr2nzCBbwXgS+6+6Yk62svZjYSuAPoTJg17suEk61UvjfM7NvAWYReby8CXyW0U6fi/WFm9wDHE0bTfBu4GniQAu+H6IPxFkJvn4+AL7t7qycbUeiLiKSImndERFJEoS8ikiIKfRGRFFHoi4ikiEJfRCRFFPoiIimi0BcRSRGFvohIivx/qSBUZxmSlF4AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制不同PCA维数下模型的性能，找到最佳模型／参数（分数最高）\n",
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
